source(here("Code/RandomField.R"))
The simulation, technically, would be established on a continuous space-time domain, even though the final “observations” would be a discrete realization.
Take a Gaussian process for example:
\[Y_i(\mathbf{s}, t) = \eta_i(\mathbf{s}, t) +\epsilon_i(\mathbf{s}, t)\] 2. The true latent process is composed of a fixed process and a random (subject-specific) process.
\[\eta_i(\mathbf{s}, t) = \mu(\mathbf{s}, t)+b_i(\mathbf{s}, t)\]
\[\epsilon_i(\mathbf{s}, t) = \frac{1}{N_r}\sum_{\mathbf{s'} \in S_r}Z(\mathbf{s'}, t)\]
where:
In this section, we would like to see if the filter size of moving average would affect the spatial correlation. Here we generate 2D image of 32 by 32. Note that filter size should be odd numbers, a convention inherited from image analysis. Thought even size filter is technically possible, it is much more convenienve to use odd size for many operations since it would have a center pixel
ma_ksize <- expand_grid(s2=1:32, s1=1:32)
ksize_vec <- seq(1, 9, by = 2)
for(i in seq_along(ksize_vec)){
MAerr_mat <- MA_rand_field(kf = ksize_vec[i], ki = 32)
ma_ksize[, paste0("k", ksize_vec[i])] <- as.vector(MAerr_mat)
}
ma_ksize %>% pivot_longer(starts_with("k")) %>%
ggplot()+
geom_tile(aes(x=s1, y=s2, fill=value))+
facet_wrap(~name, nrow = 1)
Below I plot the variogram function at different kernel sizes. Variogram is a measure of spatial dependence as a function of distance. Smaller value indicates greater dependence.
bind_rows(
variogram(k1~1, locations = ~s1+s2, data = ma_ksize) %>% mutate(ksize = 1),
variogram(k3~1, locations = ~s1+s2, data = ma_ksize) %>% mutate(ksize = 3),
variogram(k5~1, locations = ~s1+s2, data = ma_ksize) %>% mutate(ksize = 5),
variogram(k7~1, locations = ~s1+s2, data = ma_ksize) %>% mutate(ksize = 7),
variogram(k9~1, locations = ~s1+s2, data = ma_ksize) %>% mutate(ksize = 9)
) %>%
ggplot()+
geom_line(aes(x=dist, y=gamma, col=as.factor(ksize)))+
labs(y="Sample variagram", col = "Filter size")
Some observations:
Here, I would like to use weighted average within a filter. Let’s fix the kernel size to be 5, and explore effect of difference weight matrix.
I would like to construct a weight matrix that decay fast from center to border. Let’s try using inverse of the exponential of the square of Euclidean distance to center:
\[w_{ij} = \frac{exp(-\alpha d_{ij}^2)}{\sum_{i,j=1}^5 exp(-\alpha d_{ij}^2)}\]
# equal weight
alpha_vec <- c(0, 0.5, 1, 2.5, 5)
# proportional to inverse exponential Euclidian to center (to avoid zero demonimator)
wt_dist <- array(NA, dim = c(5, 5, length(alpha_vec)))
for(m in seq_along(alpha_vec)){
for(i in 1:5){
for(j in 1:5){
wt_dist[i,j, m] <- exp(-alpha_vec[m]*((i-3)^2+(j-3)^2))
}
}
# normalization
wt_dist[,,m] <- wt_dist[,,m]/sum(wt_dist[,,m])
}
df_wt <- expand.grid(s2=1:5, s1=1:5)
for(m in seq_along(alpha_vec)){
df_wt[, paste0("alpha", alpha_vec[m])] <- as.vector(wt_dist[,,m])
}
df_wt %>%
pivot_longer(starts_with("alpha")) %>%
ggplot()+
geom_tile(aes(x=s1, y=s2, fill=value))+
facet_wrap(~name, nrow = 1)+
labs(title = "Weight matrix")
Here, when \(\alpha=5\), the boarder weight can get very, very close to zero (corner pixel: 4.14e-18). The center pixel takes 97.36% of the weight.
ma_wt <- expand_grid(s2=1:32, s1=1:32)
for(m in seq_along(alpha_vec)){
this_wt <- wt_dist[,,m]
MAerr_mat <- MWA_rand_field(kf = 5, ki = 32, wt = this_wt)
ma_wt[, paste0("alpha", alpha_vec[m])] <- as.vector(MAerr_mat)
}
ma_wt %>% pivot_longer(starts_with("alpha")) %>%
ggplot()+
geom_tile(aes(x=s1, y=s2, fill=value))+
facet_wrap(~name, nrow = 1)
bind_rows(
variogram(alpha0~1, locations = ~s1+s2, data = ma_wt) %>% mutate(alpha = 0),
variogram(alpha0.5~1, locations = ~s1+s2, data = ma_wt) %>% mutate(alpha = 0.5),
variogram(alpha1~1, locations = ~s1+s2, data = ma_wt) %>% mutate(alpha = 1),
variogram(alpha2.5~1, locations = ~s1+s2, data = ma_wt) %>% mutate(alpha = 2.5),
variogram(alpha5~1, locations = ~s1+s2, data = ma_wt) %>% mutate(alpha = 5)
) %>%
ggplot()+
geom_line(aes(x=dist, y=gamma, col=as.factor(alpha)))+
labs(y="Sample variagram", col = "Alpha")
Follow up on last time, we would like to generate data from the null hypothesis where \(Y_1\), \(Y_2\) are not correlated with each other. We will also remove any fixed/random effect, leaving only the moving average error, in case any unexpected correlation is induced
We generate data from the following scheme:
\[\begin{aligned} Y_{1i}(s_1, s_2, t) &=\epsilon_{1i}(s_1, s_2, t) \\ Y_{2i}(s_1, s_2, t) &=\epsilon_{2i}(s_1, s_2, t) \\ \end{aligned}\]
# set up space-time grid
# generate a 2D image of 32 by 32
nS <- 32
Nf <- 1000 # generate 1000 subjects over all, but use only 100 for the block bootstrap
N <- 100
df_subj <- expand_grid(ksize = ksize_vec, id=1:Nf, s2=1:nS, s1 = 1:nS)
df_subj$Y1 <- df_subj$Y2 <- NA
# generate individual scores
# true_xi <- matrix(rnorm(2*N, 0, 1.5), nrow = N, ncol = 2)
# true_zeta <- matrix(rnorm(2*N, 0, 1.5), nrow = N, ncol = 2)
# generate outcomes
pb <- txtProgressBar(min=0, max=Nf, style = 3)
t1 <- Sys.time()
for(i in 1:Nf){ # fix a subject
for(k in seq_along(ksize_vec)){ # fix a time point
# random effect of this subject at this time
# dist_it <- df_subj$dist[df_subj$id==i & df_subj$t==this_t]
# generate Y1
## a moving average error
Y1 <- MA_rand_field(ksize_vec[k], nS)
# y1_it <- true_xi[i,1]*dist_it+true_xi[i,2]*this_t + as.vector(ma_err1)
df_subj$Y1[df_subj$ksize==ksize_vec[k] & df_subj$id==i] <- as.vector(Y1)
# generate Y2
## a moving average error
Y2 <- MA_rand_field(ksize_vec[k], nS)
# y2_it <- true_zeta[i,1]*dist_it+true_zeta[i,2]*this_t + as.vector(ma_err2)
df_subj$Y2[df_subj$ksize==ksize_vec[k] & df_subj$id==i] <- as.vector(Y2)
}
setTxtProgressBar(pb, i)
}
t2 <- Sys.time()
close(pb)
# save data
# save(df_subj, file = here("Data/sim_data_ksize.RData"))
It took 7.154 minutes to generate data for 100 subjects. Below we show an example of one subject.
df_subj %>%
filter(id==15) %>%
pivot_longer(starts_with("Y")) %>%
ggplot()+
geom_tile(aes(x=s1, y=s2, fill = value))+
facet_grid(cols=vars(ksize), rows = vars(name))+
labs(title = "Generated data of subject ID = 15")
We fit a simple linear model across space conditioning on each subject and time:
\[Y_{2i}(\mathbf{s}|t) = \beta_{it0}+\beta_{it1}Y_{1i}(\mathbf{s}|t)+\epsilon_i(\mathbf{s}|t)\]
df_subj %>%
filter(id %in% sample(1:Nf, size = 4)) %>%
ggplot(aes(x=Y1, y=Y2))+
geom_point(size = 0.2)+
geom_smooth(formula = 'y~x', method = "lm")+
stat_cor(method = "pearson")+
facet_grid(rows = vars(id), cols = vars(ksize))+
labs(title = "Perason correlation of four subject")
lm_err <- data.frame(ksize = ksize_vec)
N_vec <- c(100, 200, 500, 1000)
for(n in seq_along(N_vec)){
err_n <- df_subj %>%
filter(id <= N_vec[n]) %>%
group_by(id, ksize) %>%
group_modify(~{
fit_lm <- lm(Y2~Y1, data = .x)
data.frame(beta = coef(fit_lm)["Y1"],
pval = summary(fit_lm)$coefficients["Y1", "Pr(>|t|)"])
}) %>%
mutate(reject = pval < 0.05) %>%
group_by(ksize) %>%
summarise_at("reject", mean)
lm_err[, paste0("N", N_vec[n])] <- err_n$reject
}
lm_err %>%
rename("Filter size" = "ksize") %>%
kable(table.attr = "style=\"color:black;\"") %>%
kable_styling(full_width = F) %>%
add_header_above(c(" " = 1, "Type I error" = "4"))
| Filter size | N100 | N200 | N500 | N1000 |
|---|---|---|---|---|
| 1 | 0.01 | 0.025 | 0.038 | 0.047 |
| 3 | 0.35 | 0.395 | 0.382 | 0.353 |
| 5 | 0.55 | 0.535 | 0.524 | 0.561 |
| 7 | 0.61 | 0.680 | 0.664 | 0.645 |
| 9 | 0.79 | 0.770 | 0.750 | 0.742 |
After increasing the sample size to 1000, the k=1 case had 4.4% type I error, still slightly lower than the nominal value of 5%. Does it indicate that data generated under this scheme is more likely to have false rejection under simple linear regression? Would it be because of the spatial correlation, such as the spatial correlation introduced some kind of correlation between \(Y_1\) and \(Y_2\)?
In fact, when I repeat this, I got a decreasing type I error from 0.12 to 0.067. I think this probably just a random chance?
We hope to bootstrap non-overlapping blocks of pixels to preserve some degree of correlation. While the image may not be evenly divided by the block size, we try to make the expectation of image size constant, so that each block will be sampled with equal probability. For the re-sampled image, regardless of its size, we will fit a simple linear regression model for each subject and examine the significance of the slope.
PS. Originally, we wanted to set the resampling probability to be propotional to image size. But in fact, I think this approach couldn’t make the expectation of the pixels as the same as the original image. It would keep the expected size of the sampled image unchanged if all blocks are sampled with equal probability.
Let’s say the original image has N pixels and B blocks, and we want to resample a new image with also B blocks:
\[E(N_{new}) = E(\sum_{b=1}^Bn_b^{new}) = \sum_{b=1}^BE(n_b^{new})=\sum_{b=1}^B\sum_{b=1}^Bn_bp_b = \sum_{b=1}^B\sum_{b=1}^B n_b\frac{n_b}{N}=B\frac{\sum_{b=1}^Bn_B^2}{N} \] If we set \(p_b = 1/B\), then
\[E(N_{new}) = E(\sum_{b=1}^Bn_b^{new}) = \sum_{b=1}^BE(n_b^{new})=\sum_{b=1}^B\sum_{b=1}^Bn_bp_b = \sum_{b=1}^B\sum_{b=1}^B n_b\frac{1}{B}=\sum_{b=1}^B\frac{N}{B} = N \]
M <- 1000 # number of boostraps
max_size <- 9
# N
df_subj_k <- df_subj %>% filter(id %in% 1:N)
# ksize_vec <- c(1, 3, 5)
# containers
# bootstrap image size
img_size <- array(NA, dim = c(length(ksize_vec), max_size, N, M))
slope_est <- array(NA, dim = c(length(ksize_vec), max_size, N, M))
# pval <- array(NA, dim = c(length(ksize_vec), length(bsize), N, M))
# dim(img_size)
# bootstrap
pb <- txtProgressBar(0, length(ksize_vec)*max_size*N, style = 3)
ct <- 0
t1 <- Sys.time()
for(k in seq_along(ksize_vec)){ # filter size for data generation
for(b in 1:max_size){ # block size for bootstrap
for(i in 1:N){ # for each subject
# A matrix of observed outcome
this_df <- df_subj_k %>% filter(ksize==ksize_vec[k] & id == i)
Y_mat <- matrix(this_df$Y2, nS, nS)
# divide the matrix into blocks
rblock <- (row(Y_mat)-1)%/%b+1
cblock <- (col(Y_mat)-1)%/%b+1
block_id_mat <- (rblock-1)*max(cblock) + cblock
nblock <- max(block_id_mat)
# sample blocks
# sample the same number of blocks as the original image
this_df$block_id <- as.vector(block_id_mat)
block_list <- split(this_df, f = this_df$block_id)
for(m in 1:M){
boot_block <- sample(1:nblock, size = nblock, replace = T)
boot_df <- bind_rows(block_list[boot_block])
img_size[k, b, i, m] <- nrow(boot_df)
# fit model
boot_lm <- summary(lm(Y2~Y1, data = boot_df))$coefficients
slope_est[k, b, i, m] <- boot_lm["Y1", "Estimate"]
}
ct <- ct+1
setTxtProgressBar(pb, ct)
}
}
}
t2 <- Sys.time()
# check expectation of image size
# apply(img_size[ , , , ], MARGIN = 1:3, FUN = mean)
# mean(img_size)
# calculate Type I error
lqt_mat <- apply(slope_est[ , , , ], MARGIN = 1:3, FUN =quantile, probs = 0.025)
uqt_mat <- apply(slope_est[ , , , ], MARGIN = 1:3, FUN =quantile, probs = 0.975)
reject_mat <- 0 < lqt_mat | uqt_mat < 0
typeIerror <- apply(reject_mat, 1:2, mean)
typeIerror <- data.frame(ksize = ksize_vec, typeIerror)
typeIerror %>% pivot_longer(starts_with("X")) %>%
mutate(bsize = as.numeric(gsub("X", "", name))) %>%
ggplot()+
geom_line(aes(x=bsize, y=value, group=as.factor(ksize), col=as.factor(ksize)))+
geom_point(aes(x=bsize, y=value, group=as.factor(ksize), col=as.factor(ksize)))+
geom_hline(yintercept = 0.05, col = "black", linetype = "dashed")+
labs(x="Block size", y="Type I error", col = "Filter size")+
scale_x_continuous(breaks = 1:max_size)
Below I show the mean and standard deviation of bootstrap slope estimates for all 100 subjects (each point represents a subject).
# mean slope estimates
slope_mean1 <- apply(slope_est, MARGIN = 1:3, FUN = mean)
array(aperm(slope_mean1, perm = c(1, 3, 2)),
dim =c(N*length(ksize_vec), max_size)) %>%
data.frame() %>%
mutate(id = rep(1:N, each = length(ksize_vec)),
ksize = rep(ksize_vec, N),
.before=1) %>%
pivot_longer(starts_with("X")) %>%
mutate(bsize = as.numeric(gsub("X", "", name))) %>%
ggplot()+
geom_boxplot(aes(x=bsize, y=value, group = bsize))+
geom_jitter(aes(x=bsize, y=value), size = 0.2)+
facet_wrap(~ksize, ncol = 1)+
labs(x="Block size", title="Bootstrap mean of slope estimates")+
scale_x_continuous(breaks = 1:max_size)+
geom_hline(yintercept = 0, col="red", linetype = "dashed")
# mean slope estimates
slope_sd1 <- apply(slope_est, MARGIN = 1:3, FUN = sd)
array(aperm(slope_sd1, perm = c(1, 3, 2)),
dim =c(N*length(ksize_vec), max_size)) %>%
data.frame() %>%
mutate(id = rep(1:N, each = length(ksize_vec)),
ksize = rep(ksize_vec, N),
.before=1) %>%
pivot_longer(starts_with("X")) %>%
mutate(bsize = as.numeric(gsub("X", "", name))) %>%
ggplot()+
geom_boxplot(aes(x=bsize, y=value, group = bsize))+
geom_jitter(aes(x=bsize, y=value), size = 0.2)+
facet_wrap(~ksize, ncol = 1)+
labs(x="Block size", title="Standard deviation of bootstrap estimates")+
scale_x_continuous(breaks = 1:max_size)
Below I also showed an example of the bootstrap slope estimates of one subject.
array(slope_est[,,15,], dim = c(length(ksize_vec)*max_size, M)) %>%
data.frame() %>%
mutate(ksize = rep(ksize_vec, max_size),
bsize = rep(1:max_size, each = length(ksize_vec)),
.before = 1) %>%
pivot_longer(starts_with("X")) %>%
ggplot()+
geom_boxplot(aes(x=bsize, y=value, group=bsize))+
geom_jitter(aes(x=bsize, y=value, group=bsize), size=0.2)+
facet_wrap(~ksize, ncol=1)+
labs(x="Block size", title = "Bootstrap slope estiamtes of subject 15")+
geom_hline(yintercept = 0, col="red", linetype = "dashed")
Here the two parameters of interest are the weight matrix (in data generation process) and the block size (in bootstrap process). I will use the same weight matrix as Section 1.2, and the same block size as Section 2.1.
Just like before, I will generate data for 1000 subject, but only use 100 for the block bootstrap portion. The filter size for generating data is fixed at 5.
# set up space-time grid
# generate a 2D image of 32 by 32
# nS <- 32
# Nf
df_subj2 <- expand_grid(alpha = alpha_vec, id=1:Nf, s2=1:nS, s1 = 1:nS)
# alpha_vec
# wt_dist
df_subj2$Y1 <- df_subj2$Y2 <- NA
# generate outcomes
pb <- txtProgressBar(min=0, max=Nf, style = 3)
t1 <- Sys.time()
for(i in 1:Nf){ # fix a subject
for(k in seq_along(alpha_vec)){ # fix a weight matrix
# generate Y1
## a moving average error
Y1 <- MWA_rand_field(kf = 5, ki = 32, wt = wt_dist[,,k])
df_subj2$Y1[df_subj2$alpha==alpha_vec[k] & df_subj2$id==i] <- as.vector(Y1)
# generate Y2
## a moving average error
Y2 <- MWA_rand_field(kf = 5, ki = 32, wt = wt_dist[,,k])
# y2_it <- true_zeta[i,1]*dist_it+true_zeta[i,2]*this_t + as.vector(ma_err2)
df_subj2$Y2[df_subj2$alpha==alpha_vec[k] & df_subj$id==i] <- as.vector(Y2)
}
setTxtProgressBar(pb, i)
}
t2 <- Sys.time()
close(pb)
# save data
# save(df_subj2, file = here("Data/sim_data_wt.RData"))
df_subj2 %>%
filter(id==15) %>%
pivot_longer(starts_with("Y")) %>%
ggplot()+
geom_tile(aes(x=s1, y=s2, fill = value))+
facet_grid(cols=vars(alpha), rows = vars(name))+
labs(title = "Generated data of subject ID = 15")
df_subj2 %>%
filter(id %in% sample(1:N, size = 4)) %>%
ggplot(aes(x=Y1, y=Y2))+
geom_point(size = 0.2)+
geom_smooth(formula = 'y~x', method = "lm")+
stat_cor(method = "pearson")+
facet_grid(rows = vars(id), cols = vars(alpha))+
labs(title = "Perason correlation of four subject")
lm_err2 <- data.frame(alpha = alpha_vec)
# N_vec <- c(100, 200, 500, 1000)
for(n in seq_along(N_vec)){
err_n <- df_subj2 %>%
filter(id <= N_vec[n]) %>%
group_by(id, alpha) %>%
group_modify(~{
fit_lm <- lm(Y2~Y1, data = .x)
data.frame(beta = coef(fit_lm)["Y1"],
pval = summary(fit_lm)$coefficients["Y1", "Pr(>|t|)"])
}) %>%
mutate(reject = pval < 0.05) %>%
group_by(alpha) %>%
summarise_at("reject", mean)
lm_err2[, paste0("N", N_vec[n])] <- err_n$reject
}
lm_err2 %>%
# rename("Alpha" = "ksize") %>%
kable(table.attr = "style=\"color:black;\"") %>%
kable_styling(full_width = F) %>%
add_header_above(c(" " = 1, "Type I error" = "4"))
| alpha | N100 | N200 | N500 | N1000 |
|---|---|---|---|---|
| 0.0 | 0.48 | 0.490 | 0.526 | 0.552 |
| 0.5 | 0.40 | 0.385 | 0.386 | 0.423 |
| 1.0 | 0.23 | 0.225 | 0.242 | 0.247 |
| 2.5 | 0.06 | 0.050 | 0.056 | 0.057 |
| 5.0 | 0.04 | 0.035 | 0.030 | 0.040 |
We will follow the same set up as Section 2.2.
# M <- 1000 # number of boostraps
# N <- 100 # sample size
df_subj_wt <- df_subj2 %>% filter(id %in% 1:N)
# max_size
# containers
# bootstrap image size
img_size2 <- array(NA, dim = c(length(alpha_vec), max_size, N, M))
slope_est2 <- array(NA, dim = c(length(alpha_vec), max_size, N, M))
# bootstrap
pb <- txtProgressBar(0, length(alpha_vec)*max_size*N, style = 3)
ct <- 0
t1 <- Sys.time()
for(k in seq_along(alpha_vec)){ # filter size for data generation
for(b in 1:max_size){ # block size for bootstrap
for(i in 1:N){ # for each subject
# A matrix of observed outcome
this_df <- df_subj_wt %>% filter(alpha==alpha_vec[k] & id == i)
Y_mat <- matrix(this_df$Y2, nS, nS)
# divide the matrix into blocks
rblock <- (row(Y_mat)-1)%/%b+1
cblock <- (col(Y_mat)-1)%/%b+1
block_id_mat <- (rblock-1)*max(cblock) + cblock
nblock <- max(block_id_mat)
# sample blocks
# sample the same number of blocks as the original image
this_df$block_id <- as.vector(block_id_mat)
block_list <- split(this_df, f = this_df$block_id)
for(m in 1:M){
boot_block <- sample(1:nblock, size = nblock, replace = T)
boot_df <- bind_rows(block_list[boot_block])
img_size2[k, b, i, m] <- nrow(boot_df)
# fit model
boot_lm <- summary(lm(Y2~Y1, data = boot_df))$coefficients
slope_est2[k, b, i, m] <- boot_lm["Y1", "Estimate"]
}
ct <- ct+1
setTxtProgressBar(pb, ct)
}
}
}
t2 <- Sys.time()
# check expectation of image size
# apply(img_size[ , , , ], MARGIN = 1:3, FUN = mean)
# mean(img_size)
# calculate Type I error
lqt_mat2 <- apply(slope_est2[ , , , ], MARGIN = 1:3, FUN =quantile, probs = 0.025)
uqt_mat2 <- apply(slope_est2[ , , , ], MARGIN = 1:3, FUN =quantile, probs = 0.975)
reject_mat2 <- 0 < lqt_mat2 | uqt_mat2 < 0
typeIerror2 <- apply(reject_mat2, 1:2, mean)
typeIerror2 <- data.frame(alpha = alpha_vec, typeIerror2)
typeIerror2 %>% pivot_longer(starts_with("X")) %>%
mutate(bsize = as.numeric(gsub("X", "", name))) %>%
ggplot()+
geom_line(aes(x=bsize, y=value, group=as.factor(alpha), col=as.factor(alpha)))+
geom_point(aes(x=bsize, y=value, group=as.factor(alpha), col=as.factor(alpha)))+
geom_hline(yintercept = 0.05, col = "black", linetype = "dashed")+
labs(x="Block size", y="Type I error", col = "Weight matrix")+
scale_x_continuous(breaks = 1:max_size)
# mean slope estimates
slope_mean2 <- apply(slope_est2, MARGIN = 1:3, FUN = mean)
array(aperm(slope_mean2, perm = c(1, 3, 2)),
dim =c(N*length(alpha_vec), max_size)) %>%
data.frame() %>%
mutate(id = rep(1:N, each = length(alpha_vec)),
alpha = rep(alpha_vec, N),
.before=1) %>%
pivot_longer(starts_with("X")) %>%
mutate(bsize = as.numeric(gsub("X", "", name))) %>%
ggplot()+
geom_boxplot(aes(x=bsize, y=value, group = bsize))+
geom_jitter(aes(x=bsize, y=value), size = 0.2)+
facet_wrap(~alpha, ncol = 1)+
labs(x="Block size", title="Bootstrap mean of slope estimates")+
scale_x_continuous(breaks = 1:max_size)+
geom_hline(yintercept = 0, col="red", linetype = "dashed")
# mean slope estimates
slope_sd2 <- apply(slope_est2, MARGIN = 1:3, FUN = sd)
array(aperm(slope_sd2, perm = c(1, 3, 2)),
dim =c(N*length(alpha_vec), max_size)) %>%
data.frame() %>%
mutate(id = rep(1:N, each = length(alpha_vec)),
alpha = rep(alpha_vec, N),
.before=1) %>%
pivot_longer(starts_with("X")) %>%
mutate(bsize = as.numeric(gsub("X", "", name))) %>%
ggplot()+
geom_boxplot(aes(x=bsize, y=value, group = bsize))+
geom_jitter(aes(x=bsize, y=value), size = 0.2)+
facet_wrap(~alpha, ncol = 1)+
labs(x="Block size", title="Standard deviation of bootstrap estimates")+
scale_x_continuous(breaks = 1:max_size)
array(slope_est2[,,15,], dim = c(length(alpha_vec)*max_size, M)) %>%
data.frame() %>%
mutate(alpha = rep(alpha_vec, max_size),
bsize = rep(1:max_size, each = length(alpha_vec)),
.before = 1) %>%
pivot_longer(starts_with("X")) %>%
ggplot()+
geom_boxplot(aes(x=bsize, y=value, group=bsize))+
geom_jitter(aes(x=bsize, y=value, group=bsize), size=0.2)+
facet_wrap(~alpha, ncol=1)+
labs(x="Block size", title = "Bootstrap slope estiamtes of subject 15")+
geom_hline(yintercept = 0, col="red", linetype = "dashed")